* This file takes 2011-2013 inpatient data and produces variables to capture inpatient activity, costs and in-hospital outcomes following an ED visit

set more off

forval i=1(1)3 {

	if `i'==1{
	local datayear 1112
	local year 2011
	local def=1.101
	}
	
	if `i'==2{
	local datayear 1213
	local year 2012
	local def=1.078
	}
	
	if `i'==3{
	local datayear 1314
	local year 2013
	local def=1.060
	}
	
use "$sourcedata/ip_`year'.dta", clear

* exclusions: remove duplicates (epikey should be unique)
bys epikey: gen z=_n
keep if z==1

* exclusions: remove duplicated episodes (should not have multiple episodes with same epiorder)
gen admidate_string = mdy(admidate_month, admidate_day, admidate_year)
bys extract admidate_string epiorder: gen x=_n
drop if x>1

* exclusions: keep only emergency admissions
cap destring admimeth, replace force		// only needed for 2013 data
keep if admimeth>20 & admimeth<30

* exclusions: drop those admitted before A&E data starts 
gen finyear=`year'
drop if admidate_year<`year'

* obtain dates and discharge method (death) from final episode of each spell
foreach var in disdate_day disdate_month disdate_year dismeth {
gen `var'_temp = `var' if spelend=="Y"
egen `var'_f = max(`var'_temp), by(extract admidate_string)
}

replace disdate_year_f=. if disdate_year_f==1600

* length of stay for spell
gen disdate_f = mdy(disdate_month_f, disdate_day_f, disdate_year_f)
gen admidate_f = mdy(admidate_month, admidate_day, admidate_year)
gen spell_los = disdate_f - admidate_f
replace spell_los=. if spell_los<0

* number of procedures per episode
foreach var of varlist opertn_* {
	replace `var' = trim(lower(`var'))
	replace `var' = subinstr(`var',"-","",.)
	replace `var' = "" if substr(`var',1,1)=="z" | substr(`var',1,1)=="y" | substr(`var',1,1)=="v"	// remove descriptive codes
	}

rename opertn_0* opertn_*
forv x = 1/24 {
	forv y = 1/24 {
		replace opertn_`x' = "" if opertn_`x'==opertn_`y' & `x'!=`y'	// removes duplicates within episode
		}
	}
	
egen number_proc = rownonmiss(opertn_*), strok		// sums procedures for a given episode

* add cost data per episode
merge m:1 sushrg finyear using "$inputdata/full_tariffs.dta"
drop if _merge==2
drop _merge

gen trust_code=substr(sitetret,1,3)
merge m:1 trust_code finyear using "$inputdata/mff_0714.dta"
drop if _merge==2
replace mff=1 if _merge==1
drop _merge

gen cost_em = em_tariff*mff + max(epidur - em_trimdays,0)*dayrate
gen missing_cost = cost_em==.

* collapse and sum up costs and procedures across episodes 
sort extract_hesid epiorder
collapse (sum) cost_em number_proc (first) spell_los dismeth_f tretspef opertn_1 opertn_2 opertn_3 opertn3 opertn_4 opertn_5 opertn_6 opertn_7 opertn_8 opertn_9 opertn_10 ///
opertn_11 opertn_12 opertn_13 opertn_14 opertn_15 opertn_16  (max) missing_cost, by(extract admidate_f)

* set spell costs to missing if missing for at least one episode in the spell
replace cost_em = . if missing_cost
drop missing_cost

* helper for merge
gen admit = 1

* Convert prices into US dollars (use US Treasury exchange rate from 31st Dec 2017, 1 USD to #0.74, or #1gbp to 1.351
local ex=1.351
	
foreach var in cost_em{
replace `var' = `var'*`def'*`ex'
}

* save and tidy up
compress
save "$saveddata/data_inpat`year'.dta", replace

}
